%let syear=2014;

* [] brackets indicate redacted internal census variable name or directory that can't be disclosed. ;

*storing employment history, control variables, etc in SAS libraries;
libname controls '[directory location for individual characteristics files]';
libname icf '[directory location for geocoded worker residence files]';
libname hannah  '/projects/users/########';
libname ben   '/projects/users/########';


%macro statepanelworkers;
  %let stcodes = al ar az ca co de dc ia il in ks md me mo mt nd ne nm nv ok tn va wy;
  %local I next_st;
  %let I = 1;
  %do %while (%scan(&stcodes, &I) ne );
    %let next_st = %scan(&stcodes, &I);
      * GET WORKER HISTORIES FOR A STATE;
      *temporary file of all ehf variables, for 2000 or later;
      *Need to combine when person has worked multiple jobs in same quarter, and keep sein from employer most income;
      libname ehf&next_st "/data/mixed/lehd_s2014_ehf/&next_st.";
      DATA work.ehf_&next_st._var;
	SET ehf&next_st..ehf_&next_st (KEEP = year pik [Q1 earnings] [Q2 earnings] [Q3 earnings] [Q4 earnings] [annual earnings] sein seinunit);
	WHERE year>= 2000;
      RUN;

      * TRANSPOSE TO QUARTERLY; 
      PROC SORT data=work.ehf_&next_st._var;
	BY pik year sein seinunit [annual earnings]; 
      RUN;
      PROC TRANSPOSE 
	DATA= work.ehf_&next_st._var
	OUT = work.ehf_&next_st._var;
	BY pik year sein seinunit [annual earnings];
	VAR [Q1 earnings]-[Q4 earnings];
      RUN;
      DATA work.ehf_&next_st._var;
	SET work.ehf_&next_st._var (rename = (col1=earn_qtr));
	qtr = input(substr(_name_,5),5.);
	qtime = (year-1985)*4+qtr;
	drop _name_ _label_;
      RUN;

      * COMBINE MULTIPLE JOBS;
      PROC SORT data=work.ehf_&next_st._var;
	BY pik year qtr earn_qtr; 
      RUN;     
      PROC SQL;
	CREATE TABLE work.ehf_&next_st._var_yr AS 
	SELECT pik, year, qtr, qtime,
	SUM([annual earnings]) AS [annual earnings],
	SUM(earn_qtr) AS earn_qtr
	FROM work.ehf_&next_st._var
	GROUP BY pik, year, qtr, qtime
	ORDER BY pik, year, qtr, qtime;
      QUIT;

      * MERGE WORKER CHARACTERISTICS TO WORKER HISTORIES;
      PROC SQL;
	CREATE TABLE work.ehf_icf_&next_st AS
	SELECT
	  A.*,
	  B.*
	FROM work.ehf_&next_st._var_yr AS a
	  LEFT JOIN work.icf_controls AS b 
	  ON A.pik = B.pik;
      QUIT;

      * MERGE TO WORKER ADDRESSES; 
      PROC SQL;
	CREATE TABLE hannah.ehf_icf_&next_st AS
	SELECT
	  A.*,
	  B.pik,
	  B.[year],
	  B.[county id],
	  B.[latitude],
	  B.[longitude]
	FROM work.ehf_icf_&next_st AS a
	  LEFT JOIN icf.icf_us_addresses AS b
	  ON A.pik = B.pik AND A.year = B.[year];
      QUIT;
      PROC APPEND base = hannah.ehf_icf_states data = hannah.ehf_icf_&next_st;
      RUN;
      PROC DATASETS library = work nolist;
	DELETE ehf_&next_st._var ehf_&next_st._var_yr ehf_icf_&next_st;
      RUN; QUIT; 
      PROC DATASETS library = hannah nolist;
	DELETE ehf_icf_&next_st;
      RUN; QUIT;       
    %let I = %eval(&I + 1);
  %end;
%mend;

* remove any old copies of appended "base" dataset before running macro;
PROC DATASETS library = hannah nolist;
  delete ehf_icf_states;
RUN; QUIT;

* GET WORKER CHARACTERISTICS;
*Temporary files of all necessary control variables from the icf dataset (age, [self reported race] etc);
DATA work.icf_controls;
  SET controls.icf_us (keep = [date of birth] [place of birth] [self reported gender] [self reported race] [self reported ethnicity] [education level] pik);
  BY pik;
RUN;

%statepanelworkers;

PROC DATASETS library = work nolist;
  DELETE icf_controls;
RUN; QUIT; 

* CONSTRUCT VARIABLES;

*Creating new variable 'age', from [date of birth], 'unemp' from earn_qtr;
*Also re-scale lat/lon to y and x by 1000000;
DATA hannah.ehf_icf_states;
  SET hannah.ehf_icf_states;
  yeardate = mdy(1,1,year);
  FORMAT yeardate [date of birth] date9.;
  age=yrdif ([date of birth],yeardate);
  y = [latitude]/1000000;
  x = [longitude]/1000000;
  DROP [latitude] [longitude];
  IF earn_qtr = 0 THEN unemp = 1;
    ELSE unemp = 0;
run;

*Create gender dummies, place of birth, [self reported ethnicity] dummies;
DATA hannah.ehf_icf_states;
  SET hannah.ehf_icf_states;
  IF [self reported gender] = '[female flag]' THEN female = 1;
    ELSE female = 0;
  IF [place of birth] = '[born in usa flag]' THEN bornusa = 1;
    ELSE bornusa = 0;
  IF [self reported ethnicity] = '[hispanic flag]' THEN hispanic = 1;
    ELSE hispanic = 0;
RUN;

*Create [self reported race] and education dummies;
DATA hannah.ehf_icf_states;
  SET hannah.ehf_icf_states;
  
  * create an array for dummies with 3 bytes of storage each;
  * {*} just counts the number of categories;
  ARRAY dummys {*} 3. white black ind_nat asian haw_pi empty more_race;
  DO i = 1 to 7;
    dummys(i) = 0;
  END;
  dummys([self reported race]) = 1;

  ARRAY eddums {*} 3. nohigh highsch somecoll college;
  DO i = 1 to 4;
    eddums(i) = 0;
  END;
  eddums([education level]) = 1;

  DROP empty i [self reported gender] [place of birth] [date of birth] [self reported race] [self reported ethnicity] [education level] [year] yeardate;
RUN;




